import re

import xlrd

student_mapping_excel = "E:\\xxhzb_13015_青岛恒星科技学院_13015-XX-003_2425_001.xlsx"

stu_name_mapping = dict()

def get_data_by_row(filename, start_rowx, end_rowx, start_colx, end_colx):
    result = []
    data = xlrd.open_workbook(filename)
    table = data.sheets()[0]
    if end_rowx == 0:
        end_rowx = table.nrows
    if end_colx == 0:
        end_colx = table.ncols
    for rowx in range(start_rowx, end_rowx):
        row = [str(table.cell_value(rowx, i)) for i in range(start_colx, end_colx) if str(table.cell_value(rowx, i)) != '']
        if len(row) > 0:
            # print(row)
            result.append(row)
    return result

def load_by_header():
    _num, _name = '', ''
    all_header = get_data_by_row(student_mapping_excel, 0, 1, 0, 0)
    for title in all_header:
        for i in range(len(title)):
            item = title[i]
            if item == '姓名':
                _name = i
            elif item == '学号':
                _num = i
    data = get_data_by_row(student_mapping_excel, 2, 0, 0, 0)
    for entry in data:
        num = entry[_num]
        name = entry[_name]
        stu_name_mapping[name] = num
filenames = [
    '基于小程序的健身教练预约系统-中期报告-  陈浩然(2)(2)  .pdf'
]


student_name_pattern = re.compile(r'.*([\u4e00-\u9fa5]+)')
load_by_header()
for filename in filenames:
    if filename.lower().endswith('.pdf') and '中期报告' in filename:
        # 尝试从文件名中提取学生姓名
        student_name_match = student_name_pattern.search(filename.split('-')[-1])
        if student_name_match:
            student_name = student_name_match.group(0) if student_name_match else None
            student_id = stu_name_mapping.get(student_name.strip())
            print(student_id, student_name)